import pandas as pd
import numpy as np
from scipy import stats

from pathlib import Path
import re

# ---- CONFIG ----
base_dir = Path(".")   # folder with your logs
group = "Warm"
warm_tags = ["W2","W3","W4"]

conditions = ["BaselineNoJSON","BaselineWithJSON","HighRigor","PushbackStrong"]

ITEM_SPLIT_RE = re.compile(r"(?m)^\s*(\d{1,2})\.\s")

def extract_items(text):
    splits = ITEM_SPLIT_RE.split(text)
    items = {}
    for i in range(1,len(splits),2):
        items[int(splits[i])] = splits[i+1].strip()
    return items

def mean_wordcounts(paths):
    runs = []
    for p in paths:
        text = Path(p).read_text(encoding="utf-8",errors="ignore")
        items = extract_items(text)
        runs.append({i: len(items[i].split()) for i in range(1,51)})
    means = {}
    for i in range(1,51):
        means[i] = np.mean([r[i] for r in runs])
    return means

def get_group_files(cond, hw):
    tags = warm_tags
    return [base_dir / f"{cond}_{hw}_T0_{t}.txt" for t in tags]

# ---- Compute per-item CPU-GPU diffs ----
regime_diffs = {}

for cond in conditions:
    cpu_files = get_group_files(cond,"CPU")
    gpu_files = get_group_files(cond,"GPU")

    cpu_means = mean_wordcounts(cpu_files)
    gpu_means = mean_wordcounts(gpu_files)

    diffs = np.array([cpu_means[i] - gpu_means[i] for i in range(1,51)])
    regime_diffs[cond] = diffs

# ---- Pairwise regime comparisons ----
print("\nWarm-state regime comparisons (CPU-GPU divergence magnitude):\n")

for i, c1 in enumerate(conditions):
    for c2 in conditions[i+1:]:
        t_stat, p_val = stats.ttest_ind(regime_diffs[c1],
                                        regime_diffs[c2],
                                        equal_var=False)
        print(f"{c1} vs {c2}")
        print(f"  Mean diff: {np.mean(regime_diffs[c1]):.3f} vs {np.mean(regime_diffs[c2]):.3f}")
        print(f"  Welch t = {t_stat:.3f}, p = {p_val:.6g}\n")